import requests as req
from pyquery import PyQuery as pq
import re
import time
import xlwt
import csv
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from pyquery import PyQuery as pq
import time


class DouBanCrawler:

    def __init__(self, max_page, sleep_seconds, is_show=False):
        self.max_page = max_page
        self.sleep_seconds = sleep_seconds
        if is_show:
            self.driver = webdriver.Chrome()
        else:
            chrome_options = webdriver.ChromeOptions()
            chrome_options.add_argument('--headless')
            chrome_options.add_argument('--disable-gpu')
            self.driver = webdriver.Chrome(chrome_options=chrome_options)

    def get_comment(self, obj_id):
        proxies = {
        }
        header = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163'
        }
        start = 0
        # 结束页面
        end = self.max_page
        # 页面步长
        step = 20
        # 抓取间隔(秒)
        sleep_seconds = self.sleep_seconds
        data_list = []
        for i in range(start, end * step, step):
            print('获取第 %d 页' % (i / step + 1))
            base_url = 'https://movie.douban.com/subject/%d/comments?start=%d&limit=%d&status=P' % (obj_id, i, step)
            self.driver.get(base_url)
            doc = pq(self.driver.page_source)
            print('url: %s' % base_url)
            content = doc('#comments > div > div.comment').items()
            i = 0
            for item in content:
                # 用户名
                user = item('h3 > span.comment-info > a').text()
                # 日期
                date = item('h3 > span.comment-info > span.comment-time').text()
                # 评论
                comment = item('p').text()
                # 评分
                star = item('h3 > span.comment-info > span.rating').attr('class')
                # 点赞数
                like = item('h3 > span.comment-vote > span.votes').text()
                if star:
                    star = re.findall('\d+', star)[0]
                    star = float(star) / 10
                data_list.append([user, star, like, date, comment])
                i = i + 1
            print('当前批次已经获取到的数据量 : %d' % len(data_list))
            print('休眠%d秒' % sleep_seconds)
            time.sleep(sleep_seconds)
        return data_list

    @staticmethod
    def save_to_csv(data_list, path, save_type='w'):
        with open(path, save_type, encoding='utf-8-sig', newline='') as f:
            writer = csv.writer(f)
            writer.writerows(data_list)
        print('文件写入成功')

    def login(self, username, password):
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--disable-gpu')

        self.driver.get('https://www.douban.com/')

        iframe = self.driver.find_element_by_tag_name("iframe")
        self.driver.switch_to.frame(iframe)
        self.driver.find_element_by_class_name('account-tab-account').click()
        self.driver.find_element_by_id('username').send_keys(username)
        self.driver.find_element_by_id('password').send_keys(password)
        self.driver.find_element_by_class_name('btn-account').click()
        time.sleep(5)
        try:
            un = self.driver.find_elements_by_css_selector(
                '#db-global-nav > div > div.top-nav-info > ul > li.nav-user-account > a > span:nth-child(1)')[0].text
            if un:
                print('登录成功:%s' % un)
        except IndexError as e:
            print('登录失败')
            self.driver.quit()
            quit(0)
        time.sleep(3)
